* Request raw PRIMA data from: https://easy.dans.knaw.nl/ui/datasets/id/easy-dataset:61644
* Combine the data for all the years with variables (Year; Code; School NR; GROEP; VOLGNR; GROEPSNA; GESLACHT; WEEGFACT; GEBMAAND; GEBJAAR; GEBLANDV; GEBLANDM; JAREN; OPLV; OPLM; THETAT; THETAR, generating a unique student id (ref: excel file 'complete data compilation')
* see excel file 'used variables' for details
* import this file in stata and call it 'working file'
* Running the following do.file on the 'working file' gives us the final results.
keep if group==2
drop if nr>=5000
g achter = weegfact
recode achter 1/2=0 3/5=1
label var achter ">1.25 pupil"
egen stdmatha94=std(math) if year==1994
egen stdmatha98=std(math) if year==1998
egen stdmatha96=std(math) if year==1996
egen stdmatha00=std(math) if year==2000
egen stdmatha02=std(math) if year==2002
egen stdmath=rsum(stdmatha94 stdmatha96 stdmatha98 stdmatha00 stdmatha02)
replace stdmath=. if math==. | stdmath==0
drop stdmatha*
egen stdlanga94=std(lang) if year==1994
egen stdlanga96=std(lang) if year==1996
egen stdlanga98=std(lang) if year==1998
egen stdlanga00=std(lang) if year==2000
egen stdlanga02=std(lang) if year==2002
egen stdlang=rsum(stdlanga94 stdlanga96 stdlanga98 stdlanga00 stdlanga02)
replace stdlang=. if lang==. | stdlang==0
drop stdlanga*
drop if dobm==0
drop if dobm==20
drop if dobm==22
drop if dobm==70
drop if dobm==88
drop if dobm==98
tab dobm
tab doby
drop if doby < 87
tab doby
replace doby=1987 if doby==87
replace doby=1988 if doby==88
replace doby=1989 if doby==89
replace doby=1990 if doby==90
replace doby=1991 if doby==91
replace doby=1992 if doby==92
replace doby=1993 if doby==93
replace doby=1994 if doby==94
replace doby=1998 if doby==98
tab doby
tab cobm doby
drop if weegfact==.
drop if sex==.
drop if dobm==.
sort dobm

*(values modified for current data set and pasted into data editor from figure 1 file (dobr, dobmp, penrollr, penroll0r, penrollmp, penroll0mp) from info given by Leuven.
rename dobm mob
rename doby yob
keep if sex==1|sex==2
label variable sex "1=boy; 2=girl"
tab sex
drop if sex==.
tab weegfact
drop if weegfact==3|weegfact==4
tab weegfact
g sample0 = 1                    // all
g sample1 = weeg==1              // dutch non-disadv
g sample2 = weeg==2              // dutch disadv
g sample3 = weeg==5              // non-dutch disadv
g sample4 = (weeg==2 | weeg==5)  // all disadv
rename eduf fedu
rename edum medu
rename medu meduc
rename fedu feduc
keep if  penrollr~=. & penroll0r~=. & penrollmp~=. & penroll0mp~=. & year~=. & weegfact~=. & sex~=.
label variable stdlang "stdlang"
label variable stdmath "stdmath"
label variable sample1 "Non Disadv."
label variable sample2 "Disadv. Dutch"
label variable sample3 "Disadv. Minority"
label variable sample4 "Disadv. All"
label variable sample0 "All"
replace meduc=0 if meduc==.
replace feduc=0 if feduc==.
gen bdate = mdy( mob,  dobr,  yob)
format bdate %d
g agem =  (dofw(yw(year+1,7)) - bdate)*12/365
label var agem "Age scaled to months"
g agem2 = agem^2/100
label var agem2 "Agem^2/100"
g agem3 = agem^3/1000
label var agem3 "Agem^3/1000"
drop if bdate==.
drop if stdlang==.
drop if stdmath==.

tab meduc
replace meduc=0 if meduc==5
tab meduc
tab feduc
replace feduc=0 if feduc==5
tab feduc

*(generating dummy variables for tables 2 & 3
gen meduc1=1 if meduc==1
gen meduc2=1 if meduc==2
gen meduc3=1 if meduc==3
gen meduc4=1 if meduc==4
gen feduc1=1 if feduc==1
gen feduc2=1 if feduc==2
gen feduc3=1 if feduc==3
gen feduc4=1 if feduc==4

replace  meduc1=0 if meduc1==.
replace  meduc2=0 if meduc2==.
replace  meduc3=0 if meduc3==.
replace  meduc4=0 if meduc4==.
replace  feduc1=0 if feduc1==.
replace  feduc2=0 if feduc2==.
replace  feduc3=0 if feduc3==.
replace  feduc4=0 if feduc4==.

gen girl=1 if sex==2
replace girl=0 if girl==.

tab year

gen year94=1 if year==1994
gen year96=1 if year==1996
gen year98=1 if year==1998
gen year00=1 if year==2000

replace year94=0 if year94==.
replace year96=0 if year96==.
replace year98=0 if year98==.
replace year00=0 if year00==.

sort nr
* (add provinces info from excel file - PRIMA data set)

drop if prov==0

label variable prov "Province"
label define prov 1 "north", add
label define prov 2 "north", add
label define prov 3 "north", add
label define prov 4 "central", add
label define prov 5 "north", add
label define prov 6 "south", add
label define prov 7 "south", add
label define prov 8 "north", add
label define prov 9 "north", add
label define prov 10 "central", add
label define prov 11 "south", add
label define prov 12 "central", add

* Generating region with province variable
gen reg=1 if prov==1
replace reg=1 if prov==2
replace reg=1 if prov==3
replace reg=1 if prov==5
replace reg=1 if prov==8
replace reg=1 if prov==9
replace reg=2 if prov==4
replace reg=2 if prov==10
replace reg=2 if prov==12
replace reg=3 if prov==6
replace reg=3 if prov==7
replace reg=3 if prov==11

tab reg

label define reg 1 "north", add
label define reg 2 "central", add
label define reg 3 "south", add
label variable reg "Region"

* Creating 2 region Dummies
gen reg_n=1 if reg==1
replace reg_n=0 if reg_n==.
gen reg_c=1 if reg==2
replace reg_c=0 if reg_c==.

* Creating 11 Province Dummies
gen prov1=1 if prov==1
replace prov1=0 if prov1==.
gen prov2=1 if prov==2
replace prov2=0 if prov2==.
gen prov3=1 if prov==3
replace prov3=0 if prov3==.
gen prov4=1 if prov==4
replace prov4=0 if prov4==.
gen prov5=1 if prov==5
replace prov5=0 if prov5==.
gen prov6=1 if prov==6
replace prov6=0 if prov6==.
gen prov7=1 if prov==7
replace prov7=0 if prov7==.
gen prov8=1 if prov==8
replace prov8=0 if prov8==.
gen prov9=1 if prov==9
replace prov9=0 if prov9==.
gen prov10=1 if prov==10
replace prov10=0 if prov10==.
gen prov11=1 if prov==11
replace prov11=0 if prov11==.

gen qtr = quarter(bdate)
tab qtr

tab sample0
tab sample1
tab sample4
keep if sample1==1 | sample4==1

* creating dummies the author's way
xi i.year*i.reg i.meduc i.feduc i.weegfact i.sex i.qtr
unab xvars : _Iyear_* _Ireg_* _IyeaXreg_* _Imeduc_* _Ifeduc_* _Iweegfact_* _Isex_*
global XVARS "`xvars'"

* dropping values for younger than 4 years old and older than 8 years old
tab agem if agem<48
tab agem if agem>96
drop if agem<48
drop if agem>96

**TABLE 1 
sort sample1
by sample1: sum agem
by sample1: sum penrollr
by sample1: sum penrollmp
by sample1: tab meduc
by sample1: tab feduc
by sample1: tab sample1
by sample1: tab sample2
by sample1: tab sample3
by sample1: tab sex
by sample1: sum stdlang
by sample1: sum stdmath
sort sample2
by sample2: sum agem
by sample2: sum penrollr
by sample2: sum penrollmp
by sample2: tab meduc
by sample2: tab feduc
by sample2: tab sample1
by sample2: tab sample2
by sample2: tab sample3
by sample2: tab sex
by sample2: sum stdlang
by sample2: sum stdmath
sort sample3
by sample3: sum agem
by sample3: sum penrollr
by sample3: sum penrollmp
by sample3: tab meduc
by sample3: tab feduc
by sample3: tab sample1
by sample3: tab sample2
by sample3: tab sample3
by sample3: tab sex
by sample3: sum stdlang
by sample3: sum stdmath
sort sample4
by sample4: sum agem
by sample4: sum penrollr
by sample4: sum penrollmp
by sample4: tab meduc
by sample4: tab feduc
by sample4: tab sample1
by sample4: tab sample2
by sample4: tab sample3
by sample4: tab sex
by sample4: sum stdlang
by sample4: sum stdmath
sort sample0
by sample0: sum agem
by sample0: sum penrollr
by sample0: sum penrollmp
by sample0: tab meduc
by sample0: tab feduc
by sample0: tab sample1
by sample0: tab sample2
by sample0: tab sample3
by sample0: tab sex
by sample0: sum stdlang
by sample0: sum stdmath

**TABLE 2
* regressions for table 2 with region dummies
sort sample1
by sample1: regress penrollr i.meduc i.feduc sample2 sample3 girl year##reg agem agem2, cluster(nr)
sort sample2
by sample2: regress penrollr i.meduc i.feduc sample2 sample3 girl year##reg agem agem2, cluster(nr)
sort sample3
by sample3: regress penrollr i.meduc i.feduc sample2 sample3 girl year##reg agem agem2, cluster(nr)
sort sample4
by sample4: regress penrollr i.meduc i.feduc sample2 sample3 girl year##reg agem agem2, cluster(nr)
tab sample0
sort sample0
by sample0: regress penrollr i.meduc i.feduc sample2 sample3 girl year##reg agem agem2, cluster(nr)

* regressions for table 2 with provinces dummies
sort sample1
by sample1: regress penrollr i.meduc i.feduc sample2 sample3 girl year##prov agem agem2, cluster(nr)
sort sample2
by sample2: regress penrollr i.meduc i.feduc sample2 sample3 girl year##prov agem agem2, cluster(nr)
sort sample3
by sample3: regress penrollr i.meduc i.feduc sample2 sample3 girl year##prov agem agem2, cluster(nr)
sort sample4
by sample4: regress penrollr i.meduc i.feduc sample2 sample3 girl year##prov agem agem2, cluster(nr)
sort sample0
by sample0: regress penrollr i.meduc i.feduc sample2 sample3 girl year##prov agem agem2, cluster(nr)

* regressions for table 2 with region dummies for boys & girls seperately
sort sample1 girl
by sample1 girl: regress penrollr i.meduc i.feduc sample2 sample3 girl year##reg agem agem2, cluster(nr)
sort sample2 girl
by sample2 girl: regress penrollr i.meduc i.feduc sample2 sample3 girl year##reg agem agem2, cluster(nr)
sort sample3 girl 
by sample3 girl: regress penrollr i.meduc i.feduc sample2 sample3 girl year##reg agem agem2, cluster(nr)
sort sample4 girl
by sample4 girl: regress penrollr i.meduc i.feduc sample2 sample3 girl year##reg agem agem2, cluster(nr)
sort sample0 girl
by sample0 girl: regress penrollr i.meduc i.feduc sample2 sample3 girl year##reg agem agem2, cluster(nr)

* regressions for table 2 with provinces dummies for boys & girls seperately
sort sample1 girl
by sample1 girl: regress penrollr i.meduc i.feduc sample2 sample3 girl year##prov agem agem2, cluster(nr)
sort sample2 girl
by sample2 girl: regress penrollr i.meduc i.feduc sample2 sample3 girl year##prov agem agem2, cluster(nr)
sort sample3 girl
by sample3 girl: regress penrollr i.meduc i.feduc sample2 sample3 girl year##prov agem agem2, cluster(nr)
sort sample4 girl
by sample4 girl: regress penrollr i.meduc i.feduc sample2 sample3 girl year##prov agem agem2, cluster(nr)
sort sample0 girl
by sample0 girl: regress penrollr i.meduc i.feduc sample2 sample3 girl year##prov agem agem2, cluster(nr)

**TABLES 3 & 4
* regressions for tables 3 & 4 with region dummies
*Lang
sort sample1
by sample1: regress stdlang penrollr agem year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample1: regress stdlang penrollr agem agem2 year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample2
by sample2: regress stdlang penrollr agem year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample2: regress stdlang penrollr agem agem2 year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample3
by sample3: regress stdlang penrollr agem year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample3: regress stdlang penrollr agem agem2 year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample4
by sample4: regress stdlang penrollr agem year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl sample2, cluster(nr)
by sample4: regress stdlang penrollr agem agem2 year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl sample2, cluster(nr)
*Math
sort sample1
by sample1: regress stdmath penrollr agem year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample1: regress stdmath penrollr agem agem2 year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample2
by sample2: regress stdmath penrollr agem year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample2: regress stdmath penrollr agem agem2 year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample3
by sample3: regress stdmath penrollr agem year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample3: regress stdmath penrollr agem agem2 year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample4
by sample4: regress stdmath penrollr agem year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl sample2, cluster(nr)
by sample4: regress stdmath penrollr agem agem2 year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl sample2, cluster(nr)

* regressions for tables 3 & 4 with provinces dummies
*Lang
sort sample1
by sample1: regress stdlang penrollr agem year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample1: regress stdlang penrollr agem agem2 year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample2
by sample2: regress stdlang penrollr agem year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample2: regress stdlang penrollr agem agem2 year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample3
by sample3: regress stdlang penrollr agem year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample3: regress stdlang penrollr agem agem2 year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample4
by sample4: regress stdlang penrollr agem year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl sample2, cluster(nr)
by sample4: regress stdlang penrollr agem agem2 year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl sample2, cluster(nr)
*Math
sort sample1
by sample1: regress stdmath penrollr agem year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample1: regress stdmath penrollr agem agem2 year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample2
by sample2: regress stdmath penrollr agem year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample2: regress stdmath penrollr agem agem2 year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample3
by sample3: regress stdmath penrollr agem year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample3: regress stdmath penrollr agem agem2 year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample4
by sample4: regress stdmath penrollr agem year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl sample2, cluster(nr)
by sample4: regress stdmath penrollr agem agem2 year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl sample2, cluster(nr)

* regressions for tables 3 & 4 with region dummies for boys & girls seperately 
*Lang
sort sample1 girl
by sample1 girl: regress stdlang penrollr agem year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample1 girl: regress stdlang penrollr agem agem2 year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample2 girl
by sample2 girl: regress stdlang penrollr agem year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample2 girl: regress stdlang penrollr agem agem2 year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample3 girl
by sample3 girl: regress stdlang penrollr agem year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample3 girl: regress stdlang penrollr agem agem2 year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample4 girl
by sample4 girl: regress stdlang penrollr agem year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl sample2, cluster(nr)
by sample4 girl: regress stdlang penrollr agem agem2 year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl sample2, cluster(nr)
*Math
sort sample1 girl
by sample1 girl: regress stdmath penrollr agem year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample1 girl: regress stdmath penrollr agem agem2 year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample2 girl
by sample2 girl: regress stdmath penrollr agem year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample2 girl: regress stdmath penrollr agem agem2 year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample3 girl
by sample3 girl: regress stdmath penrollr agem year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample3 girl: regress stdmath penrollr agem agem2 year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample4 girl
by sample4 girl: regress stdmath penrollr agem year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl sample2, cluster(nr)
by sample4 girl: regress stdmath penrollr agem agem2 year##reg  meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl sample2, cluster(nr)

* regressions for tables 3 & 4 with provinces dummies for boys & girls seperately
*Lang
sort sample1 girl
by sample1 girl: regress stdlang penrollr agem year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample1 girl: regress stdlang penrollr agem agem2 year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample2 girl
by sample2 girl: regress stdlang penrollr agem year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample2 girl: regress stdlang penrollr agem agem2 year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample3 girl
by sample3 girl: regress stdlang penrollr agem year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample3 girl: regress stdlang penrollr agem agem2 year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample4 girl
by sample4 girl: regress stdlang penrollr agem year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl sample2, cluster(nr)
by sample4 girl: regress stdlang penrollr agem agem2 year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl sample2, cluster(nr)
*Math
sort sample1 girl
by sample1 girl: regress stdmath penrollr agem year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample1 girl: regress stdmath penrollr agem agem2 year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample2 girl
by sample2 girl: regress stdmath penrollr agem year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample2 girl: regress stdmath penrollr agem agem2 year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample3 girl
by sample3 girl: regress stdmath penrollr agem year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
by sample3 girl: regress stdmath penrollr agem agem2 year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl, cluster(nr)
sort sample4 girl
by sample4 girl: regress stdmath penrollr agem year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl sample2, cluster(nr)
by sample4 girl: regress stdmath penrollr agem agem2 year##prov meduc1 meduc2 meduc3 meduc4 feduc1 feduc2 feduc3 feduc4 girl sample2, cluster(nr)
